/* Matthew C Mahutga, Michaela Curran, and Anthony Roberts
   matthew.mahutga@ucr.edu
   Job Tasks and the Comparative Structure of Income and Employment: Routine Task Intensity and Offshorability for the LIS
   International Journal of Comparative Sociology 2018
   
   Description: This script builds the RTI/OFFS dataset from the Microsoft Excel file that contains all of the LIS occupational category recodes (occ1_c).
   Please adjust the file path to your LISSY dataset address where noted with [adjust file path]. See script instructions in the user guide for additional details.
   Date Modified: February 5, 2018*/
   
/* Step 1: Send the RTI / OFFS dataset as an attachment (zipped file) to LIS via email at usersupport@lisdatacenter.org. 
   Please note: LIS must review the file before uploading it to their server. This may take time.
   After your file is accepted, LIS will send you a LISSY address for it. 
   The address will follow this format: $mydata/mcurra/lis_external_merge.dta (where mcurra is the user's name for LISSY and lis_external_merge.dta is the name of the uploaded file) 
   You are able to access the uploaded file in LISSY in .dta format with this address 
   Please adjust the file path to this address where noted with [adjust file path] */

/* Step 2: Log into LISSY and append the relevant datasets using Stata code like in the example below.
   This example will append only the validation country-years and variables
   To save to your user file: save $mydata/mcurra/append1.dta, replace */

* Finland 1995, 2000, 2004, 2010
foreach file in $fi95p $fi00p $fi04p $fi10p { 
	quietly: append using `file', keep(dname cname year emp pil ppopwgt)
}
* Hungary 1991, 1994, 1999, 2005, 2009
foreach file in $hu91p $hu94p $hu99p $hu05p $hu09p { 
	quietly: append using `file', keep(dname cname year emp pil ppopwgt)
}

* Netherlands 1999, 2004, 2010
foreach file in $nl99p $nl04p $nl10p { 
	quietly: append using `file', keep(dname cname year emp pil ppopwgt)
}

* Spain 1990, 1995, 2004, 2010
foreach file in $es90 $es95 $es04 $es10 { 
	quietly: append using `file', keep(dname cname year emp pil ppopwgt)
}
/* Step 3: Merge the appended files to the external file that contains the RTI and OFFS scores
   To save to your user file: save $mydata/mcurra/append1.dta, replace */

merge m:1 dname occ1_c using $mydata/mcurra/lis_external_merge.dta // [adjust file path]

/* Step 4: Check the _merge variable. 
   For the validation analyses, keep if _merge==3  */

keep if _merge==3
rename _merge validationmerge

/* Step 4: Trim and generate labor income and total income variables for the analyses */
drop if emp==0
drop if pil<=0
bysort dname: sum pil, detail
egen pil1 = pctile(pil), p(1) by(dname)
egen pil99 = pctile(pil), p(99) by(dname)
bys dname: generate pil_pct=pil if pil >=pil1 & pil <=pil99
egen median_pil1 = median(pil_pct), by(dname)
generate labor_income=pil_pct/median_pil1

/* Step 5: Destring analysis variables */
destring labor_income ppopwgt, replace

/* Step 6: Create data for Figure 1 using the “table” function */        
table dname    
generate recode = 0          
replace recode = 1 if isco_88!=1   
* Set weights        
svyset [pw = ppopwgt]
table isco_88_r recode, contents(mean labor_income) by(dname)        

/* Step 7: Run country-specific regressions comparing ISCO_88 income estimates across aggregate recode */              
* Finland - compare 1995 (recode year) to 2000 (ISCO-88 year) and 2004 to 2010 for baseline    
svy: regress labor_income i.isco_88_r##i.recode if dname=="fi95" | dname=="fi00",       
estimates store firecode        
svy: regress labor_income i.isco_88_r##i.year if dname=="fi04" | dname=="fi10",       
estimates store fibase        
suest firecode fibase, robust        
test _b[fibase :11.isco_88_r#2010.year] = _b[firecode :11.isco_88_r#1.recode],        
test _b[fibase :12.isco_88_r#2010.year] = _b[firecode :12.isco_88_r#1.recode],         
test _b[fibase :13.isco_88_r#2010.year] = _b[firecode :13.isco_88_r#1.recode],         
test _b[fibase :21.isco_88_r#2010.year] = _b[firecode :21.isco_88_r#1.recode],         
test _b[fibase :22.isco_88_r#2010.year] = _b[firecode :22.isco_88_r#1.recode],         
test _b[fibase :23.isco_88_r#2010.year] = _b[firecode :23.isco_88_r#1.recode],         
test _b[fibase :24.isco_88_r#2010.year] = _b[firecode :24.isco_88_r#1.recode],          
test _b[fibase :31.isco_88_r#2010.year] = _b[firecode :31.isco_88_r#1.recode],          
test _b[fibase :32.isco_88_r#2010.year] = _b[firecode :32.isco_88_r#1.recode],         
test _b[fibase :33.isco_88_r#2010.year] = _b[firecode :33.isco_88_r#1.recode],         
test _b[fibase :34.isco_88_r#2010.year] = _b[firecode :34.isco_88_r#1.recode],         
test _b[fibase :41.isco_88_r#2010.year] = _b[firecode :41.isco_88_r#1.recode],         
test _b[fibase :42.isco_88_r#2010.year] = _b[firecode :42.isco_88_r#1.recode],          
test _b[fibase :51.isco_88_r#2010.year] = _b[firecode :51.isco_88_r#1.recode],          
test _b[fibase :52.isco_88_r#2010.year] = _b[firecode :52.isco_88_r#1.recode],         
test _b[fibase :61.isco_88_r#2010.year] = _b[firecode :61.isco_88_r#1.recode],         
test _b[fibase :62.isco_88_r#2010.year] = _b[firecode :62.isco_88_r#1.recode],         
test _b[fibase :71.isco_88_r#2010.year] = _b[firecode :71.isco_88_r#1.recode],         
test _b[fibase :72.isco_88_r#2010.year] = _b[firecode :72.isco_88_r#1.recode],         
test _b[fibase :73.isco_88_r#2010.year] = _b[firecode :73.isco_88_r#1.recode],         
test _b[fibase :74.isco_88_r#2010.year] = _b[firecode :74.isco_88_r#1.recode],         
test _b[fibase :81.isco_88_r#2010.year] = _b[firecode :81.isco_88_r#1.recode],         
test _b[fibase :82.isco_88_r#2010.year] = _b[firecode :82.isco_88_r#1.recode],         
test _b[fibase :83.isco_88_r#2010.year] = _b[firecode :83.isco_88_r#1.recode],         
test _b[fibase :91.isco_88_r#2010.year] = _b[firecode :91.isco_88_r#1.recode],         
test _b[fibase :92.isco_88_r#2010.year] = _b[firecode :92.isco_88_r#1.recode],         
test _b[fibase :93.isco_88_r#2010.year] = _b[firecode :93.isco_88_r#1.recode],         
    
* Hungary - compare 1991 (recode year) to 1994 (ISCO-88 year) and 1994 to 1999 for baseline
fvset base 1994 year 
svy: regress labor_income i.isco_88_r##i.recode i.year if dname=="hu91" | dname=="hu94",    
estimates store hurecode     
svy: regress labor_income i.isco_88_r##i.year if dname=="hu94" | dname=="hu99",    
estimates store hubase     
suest hurecode hubase, robust     
test _b[hubase :11.isco_88_r#2004.1994] = _b[hurecode :11.isco_88_r#1.recode],     
test _b[hubase :12.isco_88_r#1994.year] = _b[hurecode :12.isco_88_r#1.recode],      
test _b[hubase :13.isco_88_r#1994.year] = _b[hurecode :13.isco_88_r#1.recode]),      
test _b[hubase :21.isco_88_r#1994.year] = _b[hurecode :21.isco_88_r#1.recode],      
test _b[hubase :22.isco_88_r#1994.year] = _b[hurecode :22.isco_88_r#1.recode]),      
test _b[hubase :23.isco_88_r#1994.year] = _b[hurecode :23.isco_88_r#1.recode],      
test _b[hubase :24.isco_88_r#1994.year] = _b[hurecode :24.isco_88_r#1.recode],       
test _b[hubase :31.isco_88_r#1994.year] = _b[hurecode :31.isco_88_r#1.recode],       
test _b[hubase :32.isco_88_r#1994.year] = _b[hurecode :32.isco_88_r#1.recode],      
test _b[hubase :33.isco_88_r#1994.year] = _b[hurecode :33.isco_88_r#1.recode],      
test _b[hubase :34.isco_88_r#1994.year] = _b[hurecode :34.isco_88_r#1.recode],      
test _b[hubase :41.isco_88_r#1994.year] = _b[hurecode :41.isco_88_r#1.recode],      
test _b[hubase :42.isco_88_r#1994.year] = _b[hurecode :42.isco_88_r#1.recode]),       
test _b[hubase :51.isco_88_r#1994.year] = _b[hurecode :51.isco_88_r#1.recode],       
test _b[hubase :52.isco_88_r#1994.year] = _b[hurecode :52.isco_88_r#1.recode],      
test _b[hubase :61.isco_88_r#1994.year] = _b[hurecode :61.isco_88_r#1.recode],      
test _b[hubase :62.isco_88_r#1994.year] = _b[hurecode :62.isco_88_r#1.recode],      
test _b[hubase :71.isco_88_r#1994.year] = _b[hurecode :71.isco_88_r#1.recode],      
test _b[hubase :72.isco_88_r#1994.year] = _b[hurecode :72.isco_88_r#1.recode],      
test _b[hubase :73.isco_88_r#1994.year] = _b[hurecode :73.isco_88_r#1.recode],      
test _b[hubase :74.isco_88_r#1994.year] = _b[hurecode :74.isco_88_r#1.recode],      
test _b[hubase :81.isco_88_r#1994.year] = _b[hurecode :81.isco_88_r#1.recode],      
test _b[hubase :82.isco_88_r#1994.year] = _b[hurecode :82.isco_88_r#1.recode],      
test _b[hubase :83.isco_88_r#1994.year] = _b[hurecode :83.isco_88_r#1.recode],      
test _b[hubase :91.isco_88_r#1994.year] = _b[hurecode :91.isco_88_r#1.recode],      
test _b[hubase :92.isco_88_r#1994.year] = _b[hurecode :92.isco_88_r#1.recode],      
test _b[hubase :93.isco_88_r#1994.year] = _b[hurecode :93.isco_88_r#1.recode],      

* Hungary - Compare 2005 (ISCO-88 year) to 2009 (recode year) and 1999 to 2005 for baseline   
fvset base 1999 year   
svy: regress labor_income i.isco_88_r##i.recode i.year if dname=="hu05" | dname=="hu09",      
estimates store hurecode       
svy: regress labor_income i.isco_88_r##i.year if dname=="hu99" | dname=="hu05",      
estimates store hubase       
suest hurecode hubase, robust       
test _b[hubase :11.isco_88_r#2005.year] = _b[hurecode :11.isco_88_r#1.recode],       
test _b[hubase :12.isco_88_r#2005.year] = _b[hurecode :12.isco_88_r#1.recode],        
test _b[hubase :13.isco_88_r#2005.year] = _b[hurecode :13.isco_88_r#1.recode],        
test _b[hubase :21.isco_88_r#2005.year] = _b[hurecode :21.isco_88_r#1.recode],        
test _b[hubase :22.isco_88_r#2005.year] = _b[hurecode :22.isco_88_r#1.recode],        
test _b[hubase :23.isco_88_r#2005.year] = (-1*_b[hurecode :23.isco_88_r#1.recode]),        
test _b[hubase :24.isco_88_r#2005.year] = _b[hurecode :24.isco_88_r#1.recode],         
test _b[hubase :31.isco_88_r#2005.year] = _b[hurecode :31.isco_88_r#1.recode],         
test _b[hubase :32.isco_88_r#2005.year] = _b[hurecode :32.isco_88_r#1.recode],        
test _b[hubase :33.isco_88_r#2005.year] = (-1*_b[hurecode :33.isco_88_r#1.recode]),        
test _b[hubase :34.isco_88_r#2005.year] = _b[hurecode :34.isco_88_r#1.recode],        
test _b[hubase :41.isco_88_r#2005.year] = _b[hurecode :41.isco_88_r#1.recode],        
test _b[hubase :42.isco_88_r#2005.year] = _b[hurecode :42.isco_88_r#1.recode],         
test _b[hubase :51.isco_88_r#2005.year] = _b[hurecode :51.isco_88_r#1.recode],         
test _b[hubase :52.isco_88_r#2005.year] = _b[hurecode :52.isco_88_r#1.recode],        
test _b[hubase :61.isco_88_r#2005.year] = _b[hurecode :61.isco_88_r#1.recode],        
test _b[hubase :62.isco_88_r#2005.year] = _b[hurecode :62.isco_88_r#1.recode],        
test _b[hubase :71.isco_88_r#2005.year] = _b[hurecode :71.isco_88_r#1.recode],        
test _b[hubase :72.isco_88_r#2005.year] = _b[hurecode :72.isco_88_r#1.recode],        
test _b[hubase :73.isco_88_r#2005.year] = _b[hurecode :73.isco_88_r#1.recode],        
test _b[hubase :74.isco_88_r#2005.year] = _b[hurecode :74.isco_88_r#1.recode],        
test _b[hubase :81.isco_88_r#2005.year] = (-1*_b[hurecode :81.isco_88_r#1.recode]),        
test _b[hubase :82.isco_88_r#2005.year] = _b[hurecode :82.isco_88_r#1.recode],        
test _b[hubase :83.isco_88_r#2005.year] = _b[hurecode :83.isco_88_r#1.recode],        
test _b[hubase :91.isco_88_r#2005.year] = _b[hurecode :91.isco_88_r#1.recode],        
test _b[hubase :92.isco_88_r#2005.year] = _b[hurecode :92.isco_88_r#1.recode],        
test _b[hubase :93.isco_88_r#2005.year] = _b[hurecode :93.isco_88_r#1.recode], 

* Netherlands - Compare 1999 (recode year) to 2004 (ISCO-88 year) and 2004 to 2010 for baseline       
svy: regress labor_income i.isco_88_r##i.recode if dname=="nl99" | dname=="nl04",        
estimates store nlrecode     
fvset base 2010 year     
svy: regress labor_income i.isco_88_r##i.year if dname=="nl04" | dname=="nl10",        
estimates store nlbase            
suest nlrecode nlbase, robust         
test _b[nlbase :11.isco_88_r#2004.year] = _b[nlrecode :11.isco_88_r#1.recode],         
test _b[nlbase :12.isco_88_r#2004.year] = _b[nlrecode :12.isco_88_r#1.recode],          
test _b[nlbase :13.isco_88_r#2004.year] = _b[nlrecode :13.isco_88_r#1.recode],          
test _b[nlbase :21.isco_88_r#2004.year] = _b[nlrecode :21.isco_88_r#1.recode],          
test _b[nlbase :22.isco_88_r#2004.year] = _b[nlrecode :22.isco_88_r#1.recode],          
test _b[nlbase :23.isco_88_r#2004.year] = _b[nlrecode :23.isco_88_r#1.recode],          
test _b[nlbase :24.isco_88_r#2004.year] = _b[nlrecode :24.isco_88_r#1.recode],           
test _b[nlbase :31.isco_88_r#2004.year] = _b[nlrecode :31.isco_88_r#1.recode],           
test _b[nlbase :32.isco_88_r#2004.year] = _b[nlrecode :32.isco_88_r#1.recode],          
test _b[nlbase :33.isco_88_r#2004.year] = _b[nlrecode :33.isco_88_r#1.recode],          
test _b[nlbase :34.isco_88_r#2004.year] = _b[nlrecode :34.isco_88_r#1.recode],          
test _b[nlbase :41.isco_88_r#2004.year] = _b[nlrecode :41.isco_88_r#1.recode],          
test _b[nlbase :42.isco_88_r#2004.year] = _b[nlrecode :42.isco_88_r#1.recode],           
test _b[nlbase :51.isco_88_r#2004.year] = _b[nlrecode :51.isco_88_r#1.recode],           
test _b[nlbase :52.isco_88_r#2004.year] = _b[nlrecode :52.isco_88_r#1.recode],          
test _b[nlbase :61.isco_88_r#2004.year] = _b[nlrecode :61.isco_88_r#1.recode],          
test _b[nlbase :62.isco_88_r#2004.year] = _b[nlrecode :62.isco_88_r#1.recode],          
test _b[nlbase :71.isco_88_r#2004.year] = _b[nlrecode :71.isco_88_r#1.recode],          
test _b[nlbase :72.isco_88_r#2004.year] = _b[nlrecode :72.isco_88_r#1.recode],          
test _b[nlbase :73.isco_88_r#2004.year] = _b[nlrecode :73.isco_88_r#1.recode],          
test _b[nlbase :74.isco_88_r#2004.year] = _b[nlrecode :74.isco_88_r#1.recode],          
test _b[nlbase :81.isco_88_r#2004.year] = _b[nlrecode :81.isco_88_r#1.recode],          
test _b[nlbase :82.isco_88_r#2004.year] = _b[nlrecode :82.isco_88_r#1.recode],          
test _b[nlbase :83.isco_88_r#2004.year] = _b[nlrecode :83.isco_88_r#1.recode],          
test _b[nlbase :91.isco_88_r#2004.year] = _b[nlrecode :91.isco_88_r#1.recode],          
test _b[nlbase :92.isco_88_r#2004.year] = _b[nlrecode :92.isco_88_r#1.recode],          
test _b[nlbase :93.isco_88_r#2004.year] = _b[nlrecode :93.isco_88_r#1.recode],     
        
* Spain - Compare 1990 (recode year) to 1995 (ISCO-88 year) and 2004 to 2010 for baseline      
svy: regress labor_income i.isco_88_r##i.recode if dname=="es90" | dname=="es95",        
estimates store esrecode     
fvset base 2004 year     
svy: regress labor_income i.isco_88_r##i.year if dname=="es04" | dname=="es10",        
estimates store esbase           
suest esrecode esbase, robust         
test _b[esbase :11.isco_88_r#2010.year] = _b[esrecode :11.isco_88_r#1.recode],         
test _b[esbase :12.isco_88_r#2010.year] = _b[esrecode :12.isco_88_r#1.recode],          
test _b[esbase :13.isco_88_r#2010.year] = _b[esrecode :13.isco_88_r#1.recode],          
test _b[esbase :21.isco_88_r#2010.year] = _b[esrecode :21.isco_88_r#1.recode],          
test _b[esbase :22.isco_88_r#2010.year] = _b[esrecode :22.isco_88_r#1.recode],          
test _b[esbase :23.isco_88_r#2010.year] = _b[esrecode :23.isco_88_r#1.recode],          
test _b[esbase :24.isco_88_r#2010.year] = _b[esrecode :24.isco_88_r#1.recode],           
test _b[esbase :31.isco_88_r#2010.year] = _b[esrecode :31.isco_88_r#1.recode],           
test _b[esbase :32.isco_88_r#2010.year] = _b[esrecode :32.isco_88_r#1.recode],          
test _b[esbase :33.isco_88_r#2010.year] = _b[esrecode :33.isco_88_r#1.recode],          
test _b[esbase :34.isco_88_r#2010.year] = _b[esrecode :34.isco_88_r#1.recode],          
test _b[esbase :41.isco_88_r#2010.year] = _b[esrecode :41.isco_88_r#1.recode],          
test _b[esbase :42.isco_88_r#2010.year] = _b[esrecode :42.isco_88_r#1.recode],           
test _b[esbase :51.isco_88_r#2010.year] = _b[esrecode :51.isco_88_r#1.recode],           
test _b[esbase :52.isco_88_r#2010.year] = _b[esrecode :52.isco_88_r#1.recode],          
test _b[esbase :61.isco_88_r#2010.year] = _b[esrecode :61.isco_88_r#1.recode],          
test _b[esbase :62.isco_88_r#2010.year] = _b[esrecode :62.isco_88_r#1.recode],          
test _b[esbase :71.isco_88_r#2010.year] = _b[esrecode :71.isco_88_r#1.recode],          
test _b[esbase :72.isco_88_r#2010.year] = _b[esrecode :72.isco_88_r#1.recode],          
test _b[esbase :73.isco_88_r#2010.year] = _b[esrecode :73.isco_88_r#1.recode],          
test _b[esbase :74.isco_88_r#2010.year] = _b[esrecode :74.isco_88_r#1.recode],          
test _b[esbase :81.isco_88_r#2010.year] = _b[esrecode :81.isco_88_r#1.recode],          
test _b[esbase :82.isco_88_r#2010.year] = _b[esrecode :82.isco_88_r#1.recode],          
test _b[esbase :83.isco_88_r#2010.year] = _b[esrecode :83.isco_88_r#1.recode],          
test _b[esbase :91.isco_88_r#2010.year] = _b[esrecode :91.isco_88_r#1.recode],          
test _b[esbase :92.isco_88_r#2010.year] = _b[esrecode :92.isco_88_r#1.recode],          
test _b[esbase :93.isco_88_r#2010.year] = _b[esrecode :93.isco_88_r#1.recode],     

*** End of Validation Analyses ***
exit
